require(nlme)
require(reshape2)
require(ggplot2)
require(multcomp)
require(emmeans)
require(rstatix)
require(ggpubr)
require(RColorBrewer)
require(tidyverse)
require(dplyr)
require(ggbeeswarm)
require(readxl)
require(broom)
require(scales)
require(RCurl)
require(plotly)
require(ggrepel)
require(gridExtra)
require(ggExtra)
require(DT)
source("./HelperFunctions.R")


theme_point<-theme_bw()+theme(strip.background = element_blank())
theme_bar<-theme_bw()+theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1),strip.background = element_blank(),axis.ticks.x = element_blank(),axis.line.x = element_blank())
theme_boxplot<-theme_classic()+theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1),strip.background = element_blank(),axis.ticks.x = element_blank(),axis.title.x = element_blank(),axis.line.x = element_blank(),legend.position = "none")
color_panel<-c("#e35d6a","#5bb75b","#428bca","#e87810","#23496b","#ffbf00","#cc2028","#039748","pink","darkgray")
names(color_panel)<-sort(c("serum","Biomatrica","EDTA separator","EDTA","Citrate","RNA Streck","Roche","DNA Streck", "PAXgene", "ACD-A"))
set.seed(1)

#Introduction

For every metric used to compare kit-tube combinations, we tested the possibility of interaction. We build a linear mixed-effects model with tube, RNA isolation kit and timelapse as fixed effects and donorID as random effect.

1 Annotation

Sample annotation with info about tube, kit, used input volume, eluate volume etc., volume unit is µL

#Annotation
sample_annotation<-read_tsv("./Annotation_exRNAQC017.csv")
colnames(sample_annotation)[23]<-'TimeInterval' 
sample_annotation<-sample_annotation %>% mutate(donorID= case_when((Donor == "PNL-XNID") ~ "D5",
                                                                   (Donor == "PNL-7DEN") ~ "D1",
                                                                   (Donor == "PNL-8ZI1") ~ "D2",
                                                                   (Donor == "PNL-NLID") ~ "D3",
                                                                   (Donor == "PNL-UCH7") ~ "D4"), 
                                                SampleID= paste0(GraphKit,"_",GraphTube,"_",TimeInterval,"_",donorID),
                                                tube_kitID=paste0(GraphTube,"_",GraphKit),
                                                tube_kit_timeID=paste0(GraphTube,"_",GraphKit,"_",TimeInterval),
                                                PlasmaInputVml= PlasmaInputV/1000)
    
sample_annotation$Tube<-as.factor(sample_annotation$Tube)
sample_annotation$RNAisolation<-as.factor(sample_annotation$RNAisolation)
sample_annotation$TimeInterval<-as.factor(sample_annotation$TimeInterval)
sample_annotation$donorID<-as.factor(sample_annotation$donorID)

#Reads
miRNAs <- data.table::fread("mergedTxts/allmiRs_subs.txt", header=T, data.table = F)
spikes <- data.table::fread("mergedTxts/allspikes_subs.txt", header=T, data.table = F)
reads <- data.table::fread("mergedTxts/allreads_subs.txt", header=T, data.table = F)
contam <- data.table::fread("mergedTxts/allcontam_subs.txt", header=T, data.table = F)

reads_total<- read_tsv("./smallRNA_table.tsv")
reads_total <- full_join(reads_total, sample_annotation, by = "RNAID")

Tube.levels<-levels(reads_total$Tube)
RNAisolation.levels<-levels(reads_total$RNAisolation)
TimeInterval.levels<-levels(reads_total$TimeInterval)

#ggplot(reads_total, aes(x = TimeInterval, y = total_reads, group = donorID:RNAisolation)) +
#  geom_line(aes(color=RNAisolation))+geom_point() + facet_grid(. ~ Tube) + labs(title = "Raw Number of Reads")

spikes_sum <- spikes %>% gather(key="UniqueID",value="spikecounts",-"spikeID") %>% 
  group_by(UniqueID) %>% dplyr::summarise(counts=sum(spikecounts, na.rm=T)) %>%
  #spread(key=UniqueID,value=spikesum) %>% 
  mutate(reads="spikes")

# add sum of spikes to the total mapped reads (these were not considered as "mapped" yet)
reads_complete <- reads %>% gather(key="UniqueID",value="counts",-"reads") %>% 
  rbind(., spikes_sum) %>%
  spread(key=reads, value=counts) %>% 
  mutate(all_mapped = mapped + spikes) %>% 
  gather(key="reads",value="counts",-"UniqueID")

DT::datatable(sample_annotation %>% dplyr::select(c("RNAID","RNAisolation","PlasmaInputV","donorID","Tube","TimeInterval","Sequinconc","ERCCconc",Abbrevation="GraphKit")), rownames = TRUE, filter="top", options = list(pageLength = 10, scrollX=T), caption = "Sample annotation table")

2 RNA concentration

2.1 Endogenous vs LP spikes: relative small RNA concentration in eluate

calculation: count mapped miRNAS / count mapped LP spikes

gene_level_ratios <- rbind(
  reads %>% 
  filter(reads=="mapped_miR") %>%
  mutate(type="endogenous") %>%
    group_by(type) %>%
    dplyr::summarise_if(is.numeric, sum, na.rm=TRUE),spikes %>%
    mutate(type=gsub(".-..$","",spikeID)) %>%
    group_by(type) %>%
    dplyr::summarise_if(is.numeric, sum, na.rm=TRUE)
  ) %>%
  gather(., key="UniqueID",value="counts",-type) %>%  #long format
  spread(., key = "type", value="counts") %>%
  mutate("LPvsEndo"=LP/endogenous, "RCvsEndo"=RC/endogenous, "EndovsRC"=endogenous/RC, "EndovsLP"= endogenous/LP) %>%
  left_join(., sample_annotation %>%
              dplyr::select(c("UniqueID","RNAID","RNAisolation","Tube","SampleID","GraphTube","EluateV","PlasmaInputV", "BiologicalReplicate", "TimeInterval","Donor", "donorID")), by=c("UniqueID" = "RNAID"))  #add annotation
gene_level_ratios <- gene_level_ratios %>%
  mutate("EndovsLP_InputCorr"= (EndovsLP)*EluateV/PlasmaInputV)

Tube.levels<-levels(gene_level_ratios$Tube)
RNAisolation.levels<-levels(gene_level_ratios$RNAisolation)
TimeInterval.levels<-levels(gene_level_ratios$TimeInterval)

Linear mixed-effects model with crossed fixed effects of tube, kit and TimeInterval.

EndovsLP_m1<-lme(EndovsLP~Tube*RNAisolation*TimeInterval,
       random=~1|donorID, 
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=gene_level_ratios)

anova(EndovsLP_m1)
EndovsLP_m1<-lme(EndovsLP~Tube*RNAisolation*TimeInterval,
       random=~1|donorID, 
       data=gene_level_ratios)

anova(EndovsLP_m1)

Only Tube, and the interactions Tube:RNAisolation and Tube:TimeInterval are significant. There are no significant three-way interactions.

Next, we look at second-order interactions:

EndovsLP_m2<-lme(EndovsLP~(Tube+RNAisolation+TimeInterval)^2,
       random=~1|donorID, 
       data=gene_level_ratios)

anova(EndovsLP_m2)
EndovsLP_m3<-lme(EndovsLP~Tube+RNAisolation+TimeInterval+Tube:RNAisolation+Tube:TimeInterval,
       random=~1|donorID, 
       data=gene_level_ratios)

anova(EndovsLP_m3)

checking normality of residuals

qqnorm(EndovsLP_m3$residuals)
qqline(EndovsLP_m3$residuals)

For all tubes, the effect of time is investigated. The data is collapsed over RNA isolation method.

EndovsLP_m.time<-emmeans(EndovsLP_m3, ~TimeInterval|Tube)
plot(EndovsLP_m.time, comparisons=TRUE)

pairs(EndovsLP_m.time)
## Tube = Citrate:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04     -6.34 33.1 74  -0.191  0.9800
##  T0 - T16    -11.16 33.1 74  -0.337  0.9395
##  T04 - T16    -4.82 33.1 74  -0.145  0.9884
## 
## Tube = EDTA:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04     19.18 33.1 74   0.579  0.8318
##  T0 - T16   -113.14 33.1 74  -3.414  0.0030
##  T04 - T16  -132.33 33.1 74  -3.993  0.0004
## 
## Tube = serum:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04     24.20 33.1 74   0.730  0.7464
##  T0 - T16     58.03 33.1 74   1.751  0.1933
##  T04 - T16    33.83 33.1 74   1.021  0.5660
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates

Now, for both isolation kits the effect of tube is investigated

EndovsLP_m.method<-emmeans(EndovsLP_m3, ~Tube|RNAisolation)
plot(EndovsLP_m.method, comparisons=TRUE)

pairs(EndovsLP_m.method)
## RNAisolation = Maxwell:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA     -85.1 27.1 74  -3.144  0.0067
##  Citrate - serum    -14.2 27.1 74  -0.526  0.8587
##  EDTA - serum        70.8 27.1 74   2.617  0.0286
## 
## RNAisolation = miRNeasySPAkit:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA     -94.0 27.1 74  -3.473  0.0025
##  Citrate - serum   -109.0 27.1 74  -4.027  0.0004
##  EDTA - serum       -15.0 27.1 74  -0.554  0.8447
## 
## Results are averaged over the levels of: TimeInterval 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates

2.2 Endogenous vs RC spikes: relative smallRNA in plasma

calculation: count mapped miRNAS / count mapped RC spikes

gene_level_ratios$GraphTube2<- paste(gene_level_ratios$Tube, gene_level_ratios$TimeInterval, sep = "_") 
test <- log_rescaling_CI(gene_level_ratios, measurevar="EndovsRC", groupvar=c("GraphTube2"))
# Plot LP/endo in log10 scale
spikes2 <- ggplot(test, aes(x=GraphTube2, y=10^measurevar_log_resc)) + 
  #geom_bar(position=position_dodge(), stat="identity")+
  geom_errorbar(aes(ymin=ci_lower_oriscale, ymax=ci_upper_oriscale), colour="grey", width=.1) +
  geom_point(aes(colour=Tube), size=1.2) +
  geom_point(data=test, aes(x=GraphTube2, y=mean_oriscale), shape=4, colour="grey") +
  geom_hline(yintercept = 1, linetype="dashed",colour="grey88") +
  facet_wrap(~RNAisolation, scales = "free_x", ncol = 2)+
  labs(x="", y="relative small RNA conc. (rescaled)", title=NULL, col = NULL, fill = NULL) +
  scale_colour_manual(values=color_panel) +
  scale_y_log10() +
  theme_bar 
ggplotly(spikes2)

Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.

EndovsRC_m1<-lme(EndovsRC~Tube*RNAisolation*TimeInterval,
       random=~1|donorID, 
       data=gene_level_ratios)

anova(EndovsRC_m1)
anova_EndovsRC_m1 <- round(anova(EndovsRC_m1)[c(5, 6, 7), c(4)], digits = 3)

Second order interactions

EndovsRC_m2<-lme(EndovsRC~(Tube+RNAisolation+TimeInterval)^2,
       random=~1|donorID, 
       data=gene_level_ratios)

anova(EndovsRC_m2)

Only significant interactions

EndovsRC_m3<-lme(EndovsRC~Tube+RNAisolation+TimeInterval+Tube:TimeInterval,
       random=~1|donorID, 
       data=gene_level_ratios)

anova(EndovsRC_m3)

For all tubes, the effect of time is investigated. The data is collapsed over RNA isolation method.

EndovsRC.time<-emmeans(EndovsRC_m3, ~TimeInterval|Tube)
plot(EndovsRC.time, comparisons=TRUE)

pairs(EndovsRC.time)
## Tube = Citrate:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04   -17.647 18.1 76  -0.976  0.5945
##  T0 - T16   -18.139 18.1 76  -1.003  0.5774
##  T04 - T16   -0.493 18.1 76  -0.027  0.9996
## 
## Tube = EDTA:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04    13.113 18.1 76   0.725  0.7495
##  T0 - T16   -51.912 18.1 76  -2.870  0.0145
##  T04 - T16  -65.025 18.1 76  -3.595  0.0016
## 
## Tube = serum:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04    16.067 18.1 76   0.888  0.6493
##  T0 - T16    18.463 18.1 76   1.021  0.5662
##  T04 - T16    2.396 18.1 76   0.132  0.9904
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates

EDTA has a significantly higher smallRNA concentration in plasma at time T16.

2.3 RNA Extraction efficiency

Calculation: Endo/LP * EluateV/PlasmaInputV

test <- log_rescaling_CI(gene_level_ratios, measurevar="EndovsLP_InputCorr", groupvar=c("GraphTube2"))
# Plot LP/endo in log10 scale
spikes_conc <- ggplot(test, aes(x=GraphTube2, y=10^measurevar_log_resc)) + 
  #geom_bar(position=position_dodge(), stat="identity")+
  facet_wrap(~RNAisolation, scales = "free_x", ncol = 2)+
  geom_errorbar(aes(ymin=ci_lower_oriscale, ymax=ci_upper_oriscale), colour="grey", width=.1) +
  geom_point(aes(colour=Tube), size=1.2) +
  geom_point(data=test, aes(x=GraphTube2, y=mean_oriscale), shape=4, colour="grey") +
  geom_hline(yintercept = 1, linetype="dashed",colour="grey88") +
  labs(x="", y="relative endogenous small RNA concentration", title=NULL, subtitle="endogenous small RNA vs LP (rescaled)", col = NULL) +
  scale_colour_manual(values=color_panel) +
  scale_y_log10() +
  theme_bar 

ggplotly(spikes_conc)

Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.

EndovsLP_InputCorr_m1<-lme(EndovsLP_InputCorr~Tube*RNAisolation*TimeInterval,
       random=~1|donorID, 
       data=gene_level_ratios)

anova(EndovsLP_InputCorr_m1)
anova_EndovsLP_InputCorr_m1 <- round(anova(EndovsLP_InputCorr_m1)[c(5, 6, 7), c(4)], digits = 3)

Second order interactions

EndovsLP_InputCorr_m2<-lme(EndovsLP_InputCorr~(Tube+RNAisolation+TimeInterval)^2,
       random=~1|donorID, 
       data=gene_level_ratios)

anova(EndovsLP_InputCorr_m2)

keeping only significant interactions

EndovsLP_InputCorr_m3<-lme(EndovsLP_InputCorr~Tube+RNAisolation+TimeInterval+Tube:RNAisolation+Tube:TimeInterval,
       random= ~1|donorID, 
       data=gene_level_ratios)

anova(EndovsLP_InputCorr_m3)

Checking normality of residuals

qqnorm(EndovsLP_InputCorr_m3$residuals)
qqline(EndovsLP_InputCorr_m3$residuals)

For all tubes, the effect of time is investigated. The data is collapsed over RNA isolation method.

EndovsLP_InputCorr_m.time<-emmeans(EndovsLP_InputCorr_m3, ~TimeInterval|Tube)
plot(EndovsLP_InputCorr_m.time, comparisons=TRUE)

pairs(EndovsLP_InputCorr_m.time)
## Tube = Citrate:
##  contrast  estimate  SE df t.ratio p.value
##  T0 - T04    -0.656 1.7 74  -0.386  0.9212
##  T0 - T16    -1.040 1.7 74  -0.612  0.8138
##  T04 - T16   -0.384 1.7 74  -0.226  0.9722
## 
## Tube = EDTA:
##  contrast  estimate  SE df t.ratio p.value
##  T0 - T04     1.602 1.7 74   0.943  0.6149
##  T0 - T16    -6.004 1.7 74  -3.535  0.0020
##  T04 - T16   -7.606 1.7 74  -4.478  0.0001
## 
## Tube = serum:
##  contrast  estimate  SE df t.ratio p.value
##  T0 - T04     1.407 1.7 74   0.828  0.6866
##  T0 - T16     2.703 1.7 74   1.591  0.2559
##  T04 - T16    1.295 1.7 74   0.763  0.7270
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates

For both isolation kits, the effect of tube is investigated.

EndovsLP_InputCorr_m.method<-emmeans(EndovsLP_InputCorr_m3, ~Tube|RNAisolation)
plot(EndovsLP_InputCorr_m.method, comparisons=TRUE)

pairs(EndovsLP_InputCorr_m.method)
## RNAisolation = Maxwell:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA     -8.51 1.39 74  -6.133  <.0001
##  Citrate - serum    -1.42 1.39 74  -1.027  0.5624
##  EDTA - serum        7.08 1.39 74   5.106  <.0001
## 
## RNAisolation = miRNeasySPAkit:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA     -3.13 1.39 74  -2.258  0.0682
##  Citrate - serum    -3.63 1.39 74  -2.619  0.0285
##  EDTA - serum       -0.50 1.39 74  -0.360  0.9310
## 
## Results are averaged over the levels of: TimeInterval 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates

3 Gene Count (Sensitivity)

number of miRNA with counts > 6

cutoff_kit <- data.frame(Tube = sample_annotation$Tube, GraphTube = sample_annotation$GraphTube, median_th = 3)
miRNAs_long <-  miRNAs %>% gather(., key="UniqueID", value="est_counts", -MIMATID) %>% #long format
  left_join(., dplyr::select(sample_annotation,c(UniqueID,RNAID,RNAisolation,GraphTube,GraphKit, SampleID, Tube, BiologicalReplicate)), by=c("UniqueID" = "RNAID"))
#keep only miRNA coding genes with more than 0 counts

miRNAs_long <-  miRNAs_long %>% filter(est_counts > 0)

number_miR_detected <-  miRNAs_long %>% group_by(SampleID) %>% dplyr::summarize(miR_above0=n()) 

number_miR_detected <- full_join(number_miR_detected, 
                                    miRNAs_long %>% group_by(SampleID) %>%
                                     dplyr::summarize(total_est_counts_above0=sum(est_counts)), 
                                   by="SampleID")
#cutoff_kit <- single_pos %>% group_by(GraphTube) %>% dplyr::summarise(median_th = median(filter_threshold))
 miRNAs_cutoff <-  miRNAs %>% gather(., key="UniqueID", value="est_counts", -MIMATID) %>% #long format
  left_join(., dplyr::select(sample_annotation,c(UniqueID,RNAID,RNAisolation,GraphTube,GraphKit, SampleID, Tube, BiologicalReplicate, TimeInterval)), by=c("UniqueID" = "RNAID")) 
 
  #left_join(., cutoff_kit, by="GraphTube") #add the median cut-off for each kit
 miRNAs_cutoff <-  miRNAs_cutoff %>% 
  filter(est_counts > 6)
 
number_miR_detected <- full_join(number_miR_detected, 
                                    miRNAs_cutoff %>% group_by(SampleID) %>%
                                     dplyr::summarize(miR_aboveTh=n()),
                                   by="SampleID")
number_miR_detected <- full_join(number_miR_detected, 
                                    miRNAs_cutoff %>% group_by(SampleID) %>%
                                     dplyr::summarize(total_est_counts_aboveTh=sum(est_counts)), 
                                   by="SampleID")
number_miR_detected <- left_join(number_miR_detected, 
                                   dplyr::select(sample_annotation, c(UniqueID,RNAID, GraphTube, GraphKit, RNAisolation, EluateV,SampleID, Tube, BiologicalReplicate, TimeInterval, donorID)),
                                   by="SampleID")
#convert to the original format
 miRNAs_cutoff <- dplyr::select(miRNAs_cutoff, MIMATID, UniqueID, est_counts, Tube, BiologicalReplicate, TimeInterval) %>% 
  spread(., key=UniqueID, value=est_counts)
#write.csv( miRNAs_cutoff, file="../../../exRNAQC/ miRNAs_cutoff.csv",row.names=FALSE, na="")

Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.

count_m1<-lme(miR_aboveTh~Tube*RNAisolation*TimeInterval,
       random=~1|donorID, 
       data=number_miR_detected)

anova(count_m1)
anova_count_m1 <- round(anova(count_m1)[c(5, 6, 7), c(4)], digits = 3)

Second order interactions

count_m2<-lme(miR_aboveTh~(Tube+RNAisolation+TimeInterval)^2,
       random=~1|donorID, 
       data=number_miR_detected)

anova(count_m2)

Only significant interactions

count_m3<-lme(miR_aboveTh~Tube+RNAisolation+TimeInterval+Tube:RNAisolation+Tube:TimeInterval,
       random=~1|donorID, 
       data=number_miR_detected)

anova(count_m3)

Checking normality of residuals

qqnorm(count_m3$residuals)
qqline(count_m3$residuals)

For all tubes, the effect of time is investigated. The data is collapsed over RNA isolation method.

count_m3.time<-emmeans(count_m3, ~TimeInterval|Tube)
plot(count_m3.time, comparisons=TRUE)

pairs(count_m3.time)
## Tube = Citrate:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04      15.2 5.24 74   2.901  0.0134
##  T0 - T16      26.2 5.24 74   5.000  <.0001
##  T04 - T16     11.0 5.24 74   2.099  0.0969
## 
## Tube = EDTA:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04      -0.5 5.24 74  -0.095  0.9950
##  T0 - T16     -12.5 5.24 74  -2.385  0.0508
##  T04 - T16    -12.0 5.24 74  -2.290  0.0635
## 
## Tube = serum:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04       1.6 5.24 74   0.305  0.9499
##  T0 - T16       5.6 5.24 74   1.069  0.5364
##  T04 - T16      4.0 5.24 74   0.763  0.7265
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates

Pair-wise comparison T test

#T test
# Pairwise comparisons
count_pwc <-  number_miR_detected %>% group_by(TimeInterval) %>% pairwise_t_test(miR_aboveTh ~ Tube) %>% add_xy_position(x = "Tube")


# Show p-values
count_plot<-ggplot(number_miR_detected, aes(x = Tube, y = miR_aboveTh, color=Tube)) +
  geom_boxplot() +
  geom_point() +
  facet_wrap(~ as.factor(TimeInterval)) +
  stat_pvalue_manual(count_pwc, label = "p.adj") +
  scale_color_manual(values=color_panel)+
  scale_fill_manual(values=color_panel)+
  theme_point + theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1), legend.position = 'none')+
  labs(y = "sensitivity")
count_plot

at T16 Citrate has significant lower sensitivity than the other tubes

pairs(count_m3.time)
## Tube = Citrate:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04      15.2 5.24 74   2.901  0.0134
##  T0 - T16      26.2 5.24 74   5.000  <.0001
##  T04 - T16     11.0 5.24 74   2.099  0.0969
## 
## Tube = EDTA:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04      -0.5 5.24 74  -0.095  0.9950
##  T0 - T16     -12.5 5.24 74  -2.385  0.0508
##  T04 - T16    -12.0 5.24 74  -2.290  0.0635
## 
## Tube = serum:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04       1.6 5.24 74   0.305  0.9499
##  T0 - T16       5.6 5.24 74   1.069  0.5364
##  T04 - T16      4.0 5.24 74   0.763  0.7265
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates

For both isolation kits the effect of tubes is investigated

count_m3.method<-emmeans(count_m3, ~Tube|RNAisolation)
plot(count_m3.method, comparisons=TRUE)

pairs(count_m3.method)
## RNAisolation = Maxwell:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA      24.2 4.28 74   5.656  <.0001
##  Citrate - serum     -3.8 4.28 74  -0.888  0.6495
##  EDTA - serum       -28.0 4.28 74  -6.544  <.0001
## 
## RNAisolation = miRNeasySPAkit:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA     -24.5 4.28 74  -5.718  <.0001
##  Citrate - serum    -39.8 4.28 74  -9.302  <.0001
##  EDTA - serum       -15.3 4.28 74  -3.584  0.0017
## 
## Results are averaged over the levels of: TimeInterval 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates

Pair-wise comparison T test

# Pairwise comparisons
count_pwc <-  number_miR_detected %>% group_by(GraphKit) %>% pairwise_t_test(miR_aboveTh ~ Tube) %>% add_xy_position(x = "Tube")


# Show p-values
count_plot<-ggplot(number_miR_detected, aes(x = Tube, y = miR_aboveTh, color=Tube)) +
  geom_boxplot() +
  geom_point() +
  facet_wrap(~ as.factor(GraphKit)) +
  stat_pvalue_manual(count_pwc, label = "p.adj", y.position = c(215, 225, 235)) +
  scale_color_manual(values=color_panel)+
  scale_fill_manual(values=color_panel)+
  theme_point + theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1), legend.position = 'none')+
  labs(y = "sensitivity")
count_plot

#ggsave("./smallSensitivity.pdf", plot = count_plot, dpi = 300)

4 Area left of the curve (ALC)

  • The ALC (area-left-of-curve) calculation is done according to see Mestdagh et al., Nature Methods, 2014, and represents a precision metric:
    1. Only genes that reach threshold (95% single positive (SP) elimination based on exRNAQC004) are taken into account.
    2. All zero counts are replaced by NA
    3. The log2 ratios of counts for each gene are determined, each time between 2 timepoints
    4. Then, the absolute value of these log2 ratios are taken and ranked. This is plotted for every condition
  • The faster the curve reaches 100% -> the smaller the ALC -> the better the precision (indicates that the replicates give similar counts for each gene)

The ALC values are summarized based on the average of the ALC values (after removing NAs). The dot plot at the end gives an overview of all these values.

Score: lower ALC = better

Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.

ALC_m1<-lme(ALC_calc~Tube*RNAisolation*TimePoint,
       random=~1|BiologicalReplicate, 
       data=ALC)

anova(ALC_m1)
anova_ALC_m1 <- round(anova(ALC_m1)[c(5, 6, 7), c(4)], digits = 3)

There is a significant interaction between Tube:RNAIsolatiom

Now, second-order interactions

ALC_m2<-lme(ALC_calc~(Tube+RNAisolation+TimePoint)^2,
       random=~1|BiologicalReplicate, 
       data=ALC)

anova(ALC_m2)

There interaction between Tube:RNAIsolatiom remains significant.

Now, the model keeping only the significant interaction

ALC_m3<-lme(ALC_calc~Tube+RNAisolation+TimePoint+Tube:RNAisolation,
       random=~1|BiologicalReplicate, 
       data=ALC)

anova(ALC_m3)

Checking normality of residuals

qqnorm(ALC_m3$residuals)
qqline(ALC_m3$residuals)

For both RNA isolation methods, the effect of tube is investigated. The data is collapsed over time.

ALC_m3.methods<-emmeans(ALC_m3, ~Tube|RNAisolation)
plot(ALC_m3.methods, comparisons=TRUE)

pairs(ALC_m3.methods)
## RNAisolation = Maxwell:
##  contrast        estimate     SE df t.ratio p.value
##  Citrate - EDTA   0.00600 0.0109 49   0.548  0.8480
##  Citrate - serum  0.02292 0.0109 49   2.093  0.1017
##  EDTA - serum     0.01692 0.0109 49   1.545  0.2790
## 
## RNAisolation = miRNeasySPAkit:
##  contrast        estimate     SE df t.ratio p.value
##  Citrate - EDTA   0.04688 0.0109 49   4.283  0.0002
##  Citrate - serum  0.04910 0.0109 49   4.485  0.0001
##  EDTA - serum     0.00222 0.0109 49   0.202  0.9777
## 
## Results are averaged over the levels of: TimePoint 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates

Pair-wise T test

# Pairwise comparisons
ALC_pwc <- ALC %>% group_by(RNAisolation) %>% pairwise_t_test(ALC_calc ~ Tube) %>% add_xy_position(x = "Tube")

# Show p-values
alc_plot<-ggplot(ALC, aes(x = Tube, y = ALC_calc, color = Tube)) +
  geom_boxplot()+
  geom_point()+
  scale_colour_manual(values=color_panel)+
  facet_wrap(~ RNAisolation) +
  stat_pvalue_manual(ALC_pwc, label = "p.signif") +
  theme(panel.grid.major.x=element_line(linetype = "dashed",color="gray88"), axis.text.x=element_text(size=8))+
  labs(title = "Tube comparison within kits (T test)", y = "Area left of the curve")
alc_plot

5 Biotype: Percentage of miRNAs counts

We are usually interested in miRNAs, and the other biotypes are often seen as contaminants. The fraction was calculated as the number of miRNA reads / total reads (mapped + spikes)

perc_miRs <- miRNAs %>% gather(key="UniqueID",value="counts", -"MIMATID") %>% mutate(type=gsub("[0-9].*$","",MIMATID)) %>%
  dplyr::group_by(UniqueID,type) %>% 
  dplyr::summarise(sum_counts = sum(counts, na.rm=T)) %>% #calculate sum of MIMAT per sample
  full_join(reads_complete %>% filter(reads=="all_mapped")) %>% 
  mutate(perc=sum_counts/counts*100) %>% #calculate perc of miRs and spikes of total mapped reads
  left_join(sample_annotation, by=c("UniqueID" = "RNAID"))

Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.

Biotype_m1<-lme(perc~Tube*RNAisolation*TimeInterval,
       random=~1|BiologicalReplicate, 
       data=perc_miRs)

anova(Biotype_m1)
anova_Biotype_m1 <- round(anova(Biotype_m1)[c(5, 6, 7), c(4)], digits = 3)

Second order interactions:

Biotype_m2<-lme(perc~(Tube+RNAisolation+TimeInterval)^2,
       random=~1|BiologicalReplicate, 
       data=perc_miRs)

anova(Biotype_m2)

Keeping only the significant interactions:

Biotype_m3<-lme(perc~Tube+RNAisolation+TimeInterval+Tube:RNAisolation+Tube:TimeInterval,
       random=~1|BiologicalReplicate, 
       data=perc_miRs)
anova(Biotype_m3)

For both isolation kits the effect of tubes was investigated.

Biotype_m3.methods<-emmeans(Biotype_m3, ~Tube|RNAisolation)
plot(Biotype_m3.methods, comparisons=TRUE)

pairs(Biotype_m3.methods)
## RNAisolation = Maxwell:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA     -6.09 2.46 78  -2.474  0.0407
##  Citrate - serum    14.81 2.46 78   6.021  <.0001
##  EDTA - serum       20.90 2.46 78   8.495  <.0001
## 
## RNAisolation = miRNeasySPAkit:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA    -38.44 2.46 78 -15.628  <.0001
##  Citrate - serum   -15.22 2.46 78  -6.189  <.0001
##  EDTA - serum       23.22 2.46 78   9.439  <.0001
## 
## Results are averaged over the levels of: TimeInterval 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates

Pair-wise T test

# Pairwise comparisons
Biotype_pwc <- perc_miRs %>% group_by(RNAisolation) %>% pairwise_t_test(perc ~ Tube) %>% add_xy_position(x = "Tube")

# Show p-values
Biotype_plot<-ggplot(perc_miRs, aes(x = Tube, y = perc, color = Tube)) +
  geom_boxplot()+
  geom_point()+
  scale_colour_manual(values=color_panel)+
  facet_wrap(~ RNAisolation) +
  stat_pvalue_manual(Biotype_pwc, label = "p.signif") +
  theme(panel.grid.major.x=element_line(linetype = "dashed",color="gray88"), axis.text.x=element_text(size=8))+
  labs(title = "Tube comparison within kits (T test)", y = "miRNA counts (%)")
Biotype_plot

For all tubes the effect of time interval was investigated

Biotype_m3.time<-emmeans(Biotype_m3, ~TimeInterval|Tube)
plot(Biotype_m3.time, comparisons=TRUE)

pairs(Biotype_m3.time)
## Tube = Citrate:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04     0.590 3.01 78   0.196  0.9791
##  T0 - T16    13.105 3.01 78   4.350  0.0001
##  T04 - T16   12.515 3.01 78   4.154  0.0002
## 
## Tube = EDTA:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04     1.672 3.01 78   0.555  0.8442
##  T0 - T16     2.018 3.01 78   0.670  0.7816
##  T04 - T16    0.346 3.01 78   0.115  0.9928
## 
## Tube = serum:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04     4.579 3.01 78   1.520  0.2873
##  T0 - T16     7.874 3.01 78   2.614  0.0286
##  T04 - T16    3.295 3.01 78   1.094  0.5208
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates

6 Conclusion

Highly significant interactions were found between Tube:RNAisolation and Tube:TimeInterval for metrics: extraction efficiency, sensitivity, and biotype. No significant interaction between RNAisolation:Timeinterval.

overview <- data.frame(row.names = c("Tube:RNAisolation","Tube:TimeInterval","RNAisolation:TimeInterval"), "RNA concentration" = anova_EndovsRC_m1, "extraction efficiency" = anova_EndovsLP_InputCorr_m1, "sensitivity" = anova_count_m1, "reproducibility" = anova_ALC_m1, "biotype" = anova_Biotype_m1, check.names = FALSE)
overview_t <- t(overview)

#breaks for heatmap
bk1 <- c(seq(0,0.051,by=0.005))

#make color palette
custom_palette = c(colorRampPalette(brewer.pal(n = 7, name =
  "RdBu"))(20))
custom_palette2 = c(custom_palette[1:10], 'white')

#make matrix with labels for heatmap
labels_tab <- overview_t
labels_tab[labels_tab<0.01]<-"<0.01"

#make heatmap
phm<-pheatmap::pheatmap(overview_t,
                   breaks = bk1,
                   display_numbers = labels_tab,fontsize_number=11, number_color="black",
                   color = custom_palette2,
                   angle_col = 45,
                   cluster_rows=FALSE, cluster_cols=FALSE)

ggsave("./smallInteractions2.pdf", plot = phm, dpi = 300)
## Saving 7 x 5 in image